#### "How do past repression and indoctrination affect redistributive preferences?" ####
# authors: "Pelke, Lars"
# date: 2019-10-23
# written under "R version 3.6.0 (2019-03-11)"

#### Libraries ####

library(countrycode)
library(tidyverse)
library(viridis)
library(readstata13)
library(lme4)
library(performance)
library(sjPlot)
require(Amelia)


#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# set working directory

#### Load data ####

merged_data <- readRDS("data/merged_data_master.rds") 


### Drop Data #####

summary(merged_data$v2exl_legitideol_5)
summary(merged_data$v2x_clphy_5)
summary(merged_data$s_far_Maddison_gdppc_1990_estim_5)
summary(merged_data$s_far_Maddison_pop_estimate_5)

mean.new <- function(v) {
  if (all(is.na(v))) { return(NA) } else { return(mean(v, na.rm=T)) }
}

overview_na_macro_data <- merged_data %>% 
  group_by(data, wave, year, iso3n) %>%
  summarize(mean_e_migdppcln = mean.new(e_migdppcln),
            mean_v2x_polyarchy = mean.new(v2x_polyarchy)) 


merged_data_full <- merged_data %>%
  drop_na(v2x_polyarchy, autocracy, e_migdppcln, s_far_Maddison_pop_estimate_5,
          s_far_Maddison_gdppc_1990_estim_5, v2x_clphy_5, v2exl_legitideol_5, autocracy_5)

#### Construct Multidimensional Redistributive Demand Index ####

sum.new <- function(v) {
  if (all(is.na(v))) { return(NA) } else { return(sum(v, na.rm=T)) }
}

merged_data_full <- merged_data_full %>%
  rowwise() %>%
  mutate(red_pref = sum(government_resp, income_equality, na.rm = TRUE)/2) %>%
  mutate(red_pref = ifelse(is.na(government_resp) & is.na(income_equality), NA,red_pref ))

sum(is.na(merged_data_full$red_pref))
summary(merged_data_full$red_pref)

merged_data_full <- merged_data_full %>%
  drop_na(red_pref)

#### Delete Cohorts with less than 1000 persons 

merged_data_full %>%
  group_by(cohort_5.x) %>%
  count()

merged_data_full <- merged_data_full %>%
  filter(cohort_5.x >= 1910) # delete all respondents which birth cohort is before 1910


#### Extract countries with more >= 10 years of observations ####

merged_data_full2 <- merged_data_full %>%
  group_by(country_name) %>%
  mutate(num_years = max(year) - min(year)) %>%
  filter(num_years >= 10) %>%
  ungroup()

table(merged_data_full2$num_years)

#### Extract countries with more than three surveys #####

num_surveys <- merged_data_full2 %>%
  ungroup() %>%
  group_by(country_name, year) %>%
  count()

num_surveys <- num_surveys %>%
  ungroup() %>%
  select(country_name) 

counts <- data.frame(table(num_surveys))

counts <- counts %>%
  filter(Freq >= 3)

is.na(counts$num_surveys) <- counts$num_surveys == "Singapore"
is.na(counts$num_surveys) <- counts$num_surveys == "Algeria"
is.na(counts$num_surveys) <- counts$num_surveys == "Montenegro"
is.na(counts$num_surveys) <- counts$num_surveys == "Zimbabwe"

counts$num_surveys <- factor(counts$num_surveys)

counts <- counts %>%
  rename(country_name = num_surveys)

country_list <- counts$country_name

merged_data_full2 <- merged_data_full2 %>%
  ungroup() %>%
  dplyr::filter(country_name %in% country_list)

## drop observations with less than 10 individuals per cohort and country ##

count_cohort_countries <- merged_data_full2 %>%
  group_by(cohortmatch5_15, country_name) %>%
  count() %>%
  rename(indidivuals_per_country = n) %>%
  mutate(country_cohort = str_c(country_name, cohortmatch5_15, sep = "_")) %>%
  dplyr::filter(indidivuals_per_country >=10 )

country_cohort_list <- count_cohort_countries$country_cohort # list of country-years with enough data 

# filter merged_data_full 2 with above list 

merged_data_full2 <- merged_data_full2 %>%
  ungroup() %>%
  mutate(country_cohort = str_c(country_name, cohortmatch5_15, sep = "_")) 

class(merged_data_full2$country_cohort)

merged_data_full2 <- merged_data_full2 %>%
  ungroup()%>%
  dplyr::filter(country_cohort %in%country_cohort_list)

#### Delete Germany, problems with merging socialization variables ####

merged_data_full2 <- merged_data_full2 %>%
  ungroup()%>%
  dplyr::filter(country_name != "Germany")


num_waves <- merged_data_full2 %>%
  group_by(country_name, year) %>%
  summarise(wave = max(wave), 
            number_i = n(),
            v2x_regime = max(v2x_regime)) %>%
  mutate(v2x_regime = case_when(v2x_regime== 0 ~"Closed Autocracy", 
                                v2x_regime== 1 ~"Electoral Autocracy", 
                                v2x_regime== 2 ~"Electoral Democracy", 
                                v2x_regime== 3 ~"Liberal Democracy"))

library(stargazer)
stargazer(num_waves, summary=FALSE)

num_countries <-  merged_data_full2 %>% 
  group_by(country_name) %>%
  count()

stargazer(num_countries, summary=FALSE)


merged_data_full2 <- merged_data_full2 %>%
  select(-cohort_5.y) %>%
  rename(cohort_5 = cohort_5.x)



#### Multiple Imputation with Amelia ####

merged_data_full2 <- merged_data_full2 %>% 
  mutate(individual_id = row_number())

summary(merged_data_full2$sex)
summary(merged_data_full2$unemployed)
summary(merged_data_full2$education_3)
summary(merged_data_full2$income_deciles)


merged_data_full2_mi <- merged_data_full2 %>%
  select(v2x_polyarchy, autocracy, e_migdppcln, s_far_Maddison_pop_estimate_5,
         s_far_Maddison_gdppc_1990_estim_5, v2x_clphy_5, v2exl_legitideol_5, autocracy_5, e_peginiwi, e_peginiwi_5, v2x_polyarchy_5, 
         gini_disp, gini_mkt, gini_disp_5, gini_mkt_5, 
         unemployed, education_3, sex, income_deciles, age, time_under_autocracy, time_under_autocracy_15,
         red_pref, government_resp, income_equality, 
         cohort_5, cohortmatch5_15, cohortmatch5_20, year, country_name, country_id, data, individual_id)

merged_data_full2_mi <- as.data.frame(merged_data_full2_mi)

class(merged_data_full2_mi)

require(Amelia)
library(foreign)

a.merged_data_full2 <- amelia(merged_data_full2_mi, 
                              idvars=c("v2x_polyarchy", "autocracy", "e_migdppcln", "s_far_Maddison_pop_estimate_5",
                                        "s_far_Maddison_gdppc_1990_estim_5", "v2x_clphy_5", "v2exl_legitideol_5", "autocracy_5", 
                                        "red_pref",  "e_peginiwi", "e_peginiwi_5", "v2x_polyarchy_5", 
                                        "gini_disp", "gini_mkt", "gini_disp_5", "gini_mkt_5", 
                                        "cohortmatch5_15", "cohortmatch5_20", "country_id", "data", "individual_id" ), m=10, 
                              ords=c("education_3", "unemployed", "income_deciles", "sex"),
                              ts = "year", cs = "country_name", p2s=0)

summary(a.merged_data_full2)
summary(a.merged_data_full2$imputations[[1]])

write.amelia(obj=a.merged_data_full2, file.stem = "data/data", format = "dta")



# Bayesian Factor Analysis #

library(MCMCpack)

posterior_red_pref <- MCMCfactanal(~income_equality + government_resp, factors=1,
                                   verbose=0, store.scores=FALSE, a0=1, b0=0.15,
                                   data=merged_data_full2, burnin=500, mcmc=1000, thin=20)

## First, the loadings (Lambda). ##

loadings.red_pref <- summary(posterior_red_pref)$statistics[1:2]
loadings.red_pref
ci.loadings.red_pref <- summary(posterior_red_pref)$quantiles[1:2,c(1,5)]
ci.loadings.red_pref

## Second, Uniqueness ##
uniquenesses.red_pref <- data.frame(summary(posterior_red_pref)$statistics[3:4])
names(uniquenesses.red_pref)[1] <- "uniquenesses"
uniquenesses.red_pref


#### Density Plots ####


library(lme4)
library(performance)
library(ggpubr)

#### Figure 1: Distibutions of DVs ####

merged_data_full2_density <- merged_data_full2 %>%
  group_by(country_name, cohortmatch5_15) %>%
  summarize(v2x_clphy_5 = mean(v2x_clphy_5), 
            v2exl_legitideol_5 = mean(v2exl_legitideol_5), 
            autocracy_5 = first(autocracy_5))

F1 <- ggplot(merged_data_full2_density, aes(x= v2x_clphy_5, group = as.factor(autocracy_5), color = as.factor(autocracy_5), 
                                            linetype = as.factor(autocracy_5))) +
  geom_density(alpha = 0.2) +
  theme_pubr() +
  labs(x = "Physical Integrity", 
       y = "Density", 
       color = "Autocracy", linetype = "Autocracy") +
  scale_color_brewer(palette = "Dark2")

F2 <- ggplot(merged_data_full2_density, aes(x= v2exl_legitideol_5, group = as.factor(autocracy_5), 
                                            color = as.factor(autocracy_5), linetype = as.factor(autocracy_5))) +
  geom_density(alpha = 0.2) +
  theme_pubr() +
  labs(x = "Indoctrination", 
       y = "Density", 
       color = "Autocracy", 
       linetype = "Autocracy") +
  scale_color_brewer(palette = "Dark2")

ggarrange(F1, F2)

ggsave("output/Figure_1_density.pdf", width = 20 , height = 12 , units = c("cm"))

#### Colorpleth Maps for repression and indoctrination at 1930, 1950 and 1970 ####

library(ggplot2)
library(dplyr)
require(maps)
require(viridis)
theme_set(
  theme_void()
)
library(ggpubr)

## Load World Map ##

world_map <- map_data("world")
ggplot(world_map, aes(x = long, y = lat, group = group)) +
  geom_polygon(fill="white", colour = "black")

#### data management ####

data_1940 <- merged_data_full2 %>%
  group_by(country_name, cohortmatch5_15) %>% 
  summarize(v2x_clphy_5 = mean(v2x_clphy_5), 
            v2exl_legitideol_5 = mean(v2exl_legitideol_5), 
            autocracy_5 = first(autocracy_5)) %>%
  filter(cohortmatch5_15 == 1940) %>%
  rename(region = country_name) %>%
  mutate(region = ifelse(region == "United States of America", "USA", region)) 

data_1940 <- left_join(world_map, data_1940, by = "region")


data_1960 <- merged_data_full2 %>%
  group_by(country_name, cohortmatch5_15) %>% 
  summarize(v2x_clphy_5 = mean(v2x_clphy_5), 
            v2exl_legitideol_5 = mean(v2exl_legitideol_5), 
            autocracy_5 = first(autocracy_5)) %>%
  filter(cohortmatch5_15 == 1960) %>%
  rename(region = country_name) %>%
  mutate(region = ifelse(region == "United States of America", "USA", region)) 

data_1960 <- left_join(world_map, data_1960, by = "region")


data_1980  <- merged_data_full2 %>%
  group_by(country_name, cohortmatch5_15) %>% 
  summarize(v2x_clphy_5 = mean(v2x_clphy_5), 
            v2exl_legitideol_5 = mean(v2exl_legitideol_5), 
            autocracy_5 = first(autocracy_5)) %>%
  filter(cohortmatch5_15 == 1980) %>%
  rename(region = country_name) %>%
  mutate(region = ifelse(region == "United States of America", "USA", region)) 

data_1980 <- left_join(world_map, data_1980, by = "region")

#### Repression at c ####

title <- theme(
  plot.title = element_text(hjust = 0.5),
  plot.subtitle = element_text(hjust = 0.5), 
  legend.position = "right"
)

# 1940
F2a <- ggplot(data_1940, aes(long, lat, group = group))+
  geom_polygon(aes(fill = v2x_clphy_5), color = "black", size = 0.05)+
  scale_fill_viridis_c(option = "inferno", na.value="white") +
  labs(fill = "",
       title = "Physical Integrity 1940") + 
  title 

# 1960
F2b <- ggplot(data_1960, aes(long, lat, group = group))+
  geom_polygon(aes(fill = v2x_clphy_5), color = "black", size = 0.05)+
  scale_fill_viridis_c(option = "inferno", na.value="white") +
  labs(fill = "",
       title = "Physical Integrity 1960") + 
  title 

# 1980
F2c <- ggplot(data_1980, aes(long, lat, group = group))+
  geom_polygon(aes(fill = v2x_clphy_5), color = "black", size = 0.05)+
  scale_fill_viridis_c(option = "inferno", na.value="white") +
  labs(fill = "",
       title = "Physical Integrity 1980") + 
  title 

ggarrange(F2a, F2b, F2c, ncol = 1, nrow = 3, common.legend = TRUE)
ggsave("Output/F2_repression_at_c.pdf", height = 30, width = 20, units= c("cm"))

#### Indoctrination at c ####

title <- theme(
  plot.title = element_text(hjust = 0.5),
  plot.subtitle = element_text(hjust = 0.5), 
  legend.position = "right"
)

# 1940
F3a <- ggplot(data_1940, aes(long, lat, group = group))+
  geom_polygon(aes(fill = v2exl_legitideol_5), color = "black", size = 0.05)+
  scale_fill_viridis_c(option = "inferno", na.value="white") +
  labs(fill = "",
       title = "Indoctrination 1940") + 
  title 

# 1960
F3b <- ggplot(data_1960, aes(long, lat, group = group))+
  geom_polygon(aes(fill = v2exl_legitideol_5), color = "black", size = 0.05)+
  scale_fill_viridis_c(option = "inferno", na.value="white") +
  labs(fill = "",
       title = "Indoctrination 1960") + 
  title 

# 1980
F3c <- ggplot(data_1980, aes(long, lat, group = group))+
  geom_polygon(aes(fill = v2exl_legitideol_5), color = "black", size = 0.05)+
  scale_fill_viridis_c(option = "inferno", na.value="white") +
  labs(fill = "",
       title = "Indoctrination 1980") + 
  title 

ggarrange(F3a, F3b, F3c, ncol = 1, nrow = 3, common.legend = TRUE)
ggsave("Output/F3_indoctrination_at_c.pdf", height = 30, width = 20, units= c("cm"))


#### Intro-Plot Figure 1  ####

## Poland ##

data_intro_poland <- merged_data_full2 %>%
  filter(country_name == "Poland", data == "ESS") %>%
  group_by(cohort_5) %>%
  summarize(mean_red_pref = mean(red_pref), 
            sd = sd(red_pref)) %>%
  mutate(cohort_5_birth = case_when(cohort_5==1910 ~ "1910-1915", 
                                    cohort_5==1915 ~ "1916-1920",
                                    cohort_5==1920 ~ "1921-1925", 
                                    cohort_5==1925 ~ "1926-1930", 
                                    cohort_5==1930 ~ "1931-1935", 
                                    cohort_5==1935 ~ "1936-1940", 
                                    cohort_5==1940 ~ "1941-1945", 
                                    cohort_5==1945 ~ "1946-1950", 
                                    cohort_5==1950 ~ "1951-1955", 
                                    cohort_5==1955 ~ "1956-1960", 
                                    cohort_5==1960 ~ "1961-1965", 
                                    cohort_5==1965 ~ "1966-1970", 
                                    cohort_5==1970 ~ "1971-1975",
                                    cohort_5==1975 ~ "1976-1980",
                                    cohort_5==1980 ~ "1981-1985",
                                    cohort_5==1985 ~ "1986-1990",
                                    cohort_5==1990 ~ "1991-1995",
                                    cohort_5==1995 ~ "1996-2000",
                                    cohort_5==2000 ~ "2001-2005")) %>%
  filter(cohort_5_birth != "1910-1915")

F1 <- ggplot(data_intro_poland, aes(x =cohort_5_birth, y =mean_red_pref)) +
  geom_point() +
  geom_errorbar(aes(ymin=mean_red_pref-sd, ymax=mean_red_pref+sd), width=.1) +
  coord_flip() +
  theme_bw()+
  labs(y= "Redistributive Preferences", 
       x = "Birth Cohorts", 
       title = "Poland") 
  

## Portugal ##

data_intro_russia <- merged_data_full2 %>%
  filter(country_name == "Russia", data == "ESS") %>%
  group_by(cohort_5) %>%
  summarize(mean_red_pref = mean(red_pref), 
            sd = sd(red_pref)) %>%
  mutate(cohort_5_birth = case_when(cohort_5==1910 ~ "1910-1915", 
                                    cohort_5==1915 ~ "1916-1920",
                                    cohort_5==1920 ~ "1921-1925", 
                                    cohort_5==1925 ~ "1926-1930", 
                                    cohort_5==1930 ~ "1931-1935", 
                                    cohort_5==1935 ~ "1936-1940", 
                                    cohort_5==1940 ~ "1941-1945", 
                                    cohort_5==1945 ~ "1946-1950", 
                                    cohort_5==1950 ~ "1951-1955", 
                                    cohort_5==1955 ~ "1956-1960", 
                                    cohort_5==1960 ~ "1961-1965", 
                                    cohort_5==1965 ~ "1966-1970", 
                                    cohort_5==1970 ~ "1971-1975",
                                    cohort_5==1975 ~ "1976-1980",
                                    cohort_5==1980 ~ "1981-1985",
                                    cohort_5==1985 ~ "1986-1990",
                                    cohort_5==1990 ~ "1991-1995",
                                    cohort_5==1995 ~ "1996-2000",
                                    cohort_5==2000 ~ "2001-2005")) %>%
  filter(cohort_5_birth != "1910-1915")

F2 <- ggplot(data_intro_russia, aes(x =cohort_5_birth, y =mean_red_pref)) +
  geom_point() +
  geom_errorbar(aes(ymin=mean_red_pref-sd, ymax=mean_red_pref+sd), width=.1) +
  coord_flip() +
  theme_bw()+
  labs(y= "Redistributive Preferences", 
       x = "Birth Cohorts", 
       title = "Russia") 
  
ggarrange(F1, F2)
ggsave("Output/Figure_Intro.pdf", dpi = 800, height = 15, width = 30, units= c("cm"))


  
  
  
table(merged_data_full2$data)


